---
title: "Inflation Disasters"
input:
  html_document: default
  pdf_document: default
extra_dependencies:
- blkarray
- amsmath



---

```{r setup, include = FALSE}
# default chunk options
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE, results = 'asis')

# Define the path to the R Markdown file relative to the current working directory
script_path <- normalizePath("infl_disasterv3.rmd", mustWork = TRUE)

# Determine the directory containing the R Markdown file
director <- dirname(script_path)

# Define a relative path for output figures (go one level up and add "Figures")
path_figure_out <- file.path(dirname(director), "Figures")

# Set the working directory for knitting to the location of the R Markdown file
knitr::opts_knit$set(root.dir = director)
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Figure options
knitr::opts_chunk$set(fig.width = 16, fig.height = 5)

```

```{r initiate, echo = FALSE, include = FALSE}


#----------------------------------------------------------------
# init
#----------------------------------------------------------------


# clean console
cat("\014")

#----------------------------------------------------------------
# packages
#----------------------------------------------------------------


# list of dependencies
packages <- c("readxl",
              "dplyr",
              "rlang",
              "tidyr",
              "magrittr",
              "tibble",
              "zoo",
              "lubridate",
              "ggplot2",
              "grid",
              "ggpubr",
              "gridExtra",
              "kableExtra",
              "knitr",
              "purrr",
              "optimx",
              "nloptr",
              "latex2exp",
              "openxlsx",
              "stargazer",
              "summarize")

# install them if needed be
new_packages <-
  packages[!(packages %in% installed.packages()[,"Package"])] 

# installs missing packages
 if (length(new_packages) > 0) {
    install.packages(new_packages, dependencies = TRUE)
 }

# activate libraries
dependencies_out <- lapply(packages, require, character.only = TRUE)
# if (sum(unlist(dependencies_out)) < length(dependencies_out)) {
#   stop("Some dependencies not loaded successfully. Please ensure correct installation of missing packages.")
# }

# plot definitions 
theme_set(
  theme_minimal() +
            theme(panel.background = element_blank(),
                  plot.title = element_text(face = "plain", size = 25),
                  plot.subtitle = element_text(face = "italic"),
                  panel.grid.minor.x = element_blank(),
                  axis.ticks = element_blank(),
                  axis.line = element_line(),
                  legend.background = element_blank(),
                  legend.key = element_rect(colour = NA,
                                            fill = NA),
                  legend.text = element_text(size = 5),
                  axis.text  = element_text(size = 10),
                  axis.title = element_text(size = 20)
          )
)

```

```{r set_up_print_df}
knit_print.data.frame = function(x, ...) {
    res = paste(c("", "", knitr::kable(x)), collapse = "\n")
  knitr::asis_input(res)
}

registerS3method(
  "knit_print", "data.frame", knit_print.data.frame,
  envir = asNamespace("knitr")
)
```

## Data

#### 1. Jorda Schularick Taylor (JST) database

```{r load_jst}

raw <- file.path(dirname(director), "RawData")


jst_o <- read_excel(file.path(raw, "jorda_schularick_taylor_database.xlsx"), sheet = "Data")

# variables of interest: 
#     rgdppc (real gdp per capita: index, 2005 = 100) 
#     cpi (consumer price: index, 1990 = 100)

jst <- jst_o %>%
  select(year, country, rgdppc, cpi) %>%
  arrange(country, year)

# computations of rates of growth 
jst <- jst %>%
  filter_at(vars(cpi, rgdppc), ~ !is.na(.)) %>% # only IRL has NAs, all contiguous
  group_by(country) %>%
  mutate(cpi = 100*cpi/cpi[year == 2005], # cpi to same index year, 2005
         # use ratio for growth rates
         g = rgdppc/lag(rgdppc) - 1,
         pi= cpi/lag(cpi) - 1,
         # capping due to HUGE jumps in some observations (<10 observations)
         g = pmax(pmin(1, g), -1),
         pi = pmax(pmin(1, pi), -1),
         dpi = pi - lag(pi),
         pdpi = pi/lag(pi) - 1,
         date = date(paste0(year, "-12-31")) # date for plots
         ) %>%
  slice(-(1:2)) %>% # lost observation in computations
  ungroup() %>%
  select(date, country, everything(), -year)

```

#### 2. Barro (2006) disasters' database (from Maddison (2003))

```{r load_barro}

# notice that the two datasets are differentiated by the prefix b6- vs. bj-
barro <- read_excel(file.path(raw, "barro_2006_disasters_database.xlsx"))


# "b6_g_cycle_b": Beginning of cycle is a binary variable
# "b6_g_cycle":   The cycle variable has random numbers that differentiate different
#                 disaster periods. When not a disaster: NA
# "b6_duration":  Duration of cycles, NA when not disaster period
# "b6_g_cum":     Is the cumulative gdp loss during disaster, NA if not disaster

jst <- jst %>%
  mutate(year = year(date)) %>%
  merge(barro %>%
          mutate(b6_g_cycle_b = row_number(),
                 b6_duration = end - start + 1) %>%
          select(-end),
        by.x = c("country", "year"),
        by.y = c("country", "start"),
        all.x = TRUE) %>%
  merge(barro %>% select(-start),
        by.x = c("country", "year"),
        by.y = c("country", "end"),
        all.x = TRUE) %>%
  mutate(b6_g_cum = case_when(!is.na(g_cum.x) ~ g_cum.x,
                              !is.na(g_cum.y) ~ g_cum.y,
                              !is.na(lead(g_cum.x)) ~ 10^9, # upper barrier
                              !is.na(lag(g_cum.y)) ~ 10^9   # lower barrier
                              ) # otherwise NA
         ) %>%
  group_by(country) %>%
  fill(b6_g_cum, .direction = "downup") %>%
  mutate(b6_g_cycle_b = case_when(!is.na(b6_g_cycle_b) ~ 1,
                                  b6_g_cum == 10^9     ~ 0
                                  ), # otherwise NA
         b6_duration = case_when(b6_g_cum == 10^9 ~ 0,
                                 TRUE ~ b6_duration),
         b6_g_cum = ifelse(b6_g_cum == 10^9, NA, b6_g_cum),
         b6_g_disaster = ifelse(!is.na(b6_g_cum), 1, 0)) %>%
  fill(b6_duration, .direction = "down") %>%
  mutate(b6_duration = ifelse(b6_duration == 0, NA, b6_duration)) %>%
  ungroup() %>%
  select(-g_cum.x, -g_cum.y) %>%
  mutate(b6_g_cum = -1*b6_g_cum/100, # correct sign & percentage
         # uniformly distribute throughout period
         b6_g = exp(log(1+b6_g_cum) / b6_duration) - 1
         )

```

#### 3. Barro and Ursua 2008 dataset used in Barro and Jin (2011)

```{r load_barrou}

# notice that the two datasets are differentiated by the prefix b6- vs. bj-

# dataset (6 missing observations compared to original dataset)
barrou <- read_excel(file.path(raw, "barro_jin_data_2011_adjusted.xlsx"), sheet = "final dataset")

# "bj_g_cycle_b": Beginning of cycle is a binary variable
# "bj_g_cycle":   The cycle variable has random numbers that differentiate different
#                 disaster periods. When not a disaster: NA
# "bj_duration":  Duration of cycles, NA when not disaster period
# "bj_g_cum":     Is the cumulative gdp loss during disaster, NA if not disaster

jst <- jst %>%
  mutate(year = year(date)) %>%
  merge(barrou %>%
          mutate(bj_g_cycle_b = row_number(),
                 bj_duration = end - start + 1) %>%
          select(-end),
        by.x = c("country","year"),
        by.y = c("country","start"),
        all.x = TRUE) %>%
  merge(barrou %>% select(-start),
        by = "country",
        by.x = c("country","year"),
        by.y = c("country","end"),
        all.x = TRUE) %>%
  mutate(bj_g_cum = case_when(!is.na(g_cum.x) ~ g_cum.x,
                              !is.na(g_cum.y) ~ g_cum.y,
                              !is.na(lead(g_cum.x)) ~ 10^9, # upper barrier
                              !is.na(lag(g_cum.y)) ~ 10^9   # lower barrier
                              ) # otherwise NA
         ) %>%
  group_by(country) %>%
  fill(bj_g_cum, .direction = "downup") %>%
  mutate(bj_g_cycle_b = case_when(!is.na(bj_g_cycle_b) ~ 1,
                                  bj_g_cum == 10^9 ~ 0
                                  ), # otherwise NA
         bj_duration = case_when(bj_g_cum == 10^9 ~ 0,
                                 TRUE ~ bj_duration),
         bj_g_cum = ifelse(bj_g_cum == 10^9, NA, bj_g_cum),
         bj_g_disaster = ifelse(!is.na(bj_g_cum), 1, 0)) %>%
  fill(bj_duration, .direction = "down") %>%
  mutate(bj_duration = ifelse(bj_duration == 0, NA, bj_duration)) %>%
  ungroup() %>%
  select(-g_cum.x,-g_cum.y) %>%
  mutate(bj_g_cum = -1*bj_g_cum, # correct sign & percentage
         # uniformly distribute throughout period
         bj_g = exp(log(1+bj_g_cum)/bj_duration) - 1)


# Define the function to filter the jst dataset by start year
filter_jst_by_year <- function(jst_data, start_year) {
  jst_data %>%
    filter(year >= start_year)
}


# Apply the filtering function
#start_year_choice <- 1920 # Change this to 1972 or any other year if needed
#jst <- filter_jst_by_year(jst, start_year_choice)






```

## Computing Inflation disasters

### Glossary of Methods

|Method|Window  |Target
|------|--------|-------------------------
|1.1   |5-year  |Full Sample Censored Mean 
|1.2   |5-year  |Moving Censored Mean
|1.3   |5-year  |Bandwith High Pass Filter
|------|--------|-------------------------
|2.1   |Variable|Full Sample Censored Mean 
|2.2   |Variable|Moving Censored Mean
|2.3   |Variable|Bandwith High Pass Filter

Method differs on window computations technique and submethod differs on reference value used to determine a disaster

Example: x.y, method x to determine windows, submethod y to define reference value for disaster identification 

```{r aggregate_growth_rates}

# used to compute cumulative growth rates (when using ratios instead of logs)

# geometric average*n - 1
agg_gr <- function(x, na.rm = FALSE) {
  
  if (isTRUE(na.rm)) {
    x <- x[!is.na(x)]
  }
  gr <- exp(sum(log(1+x))) - 1
  return(gr)
  
}

```

### Method 1: Use five year windows for disaster occurrences

Window size: 5 years

```{r window}

wind <- 5

```

Use the *country specific inflation target* deviation per year as benchmark.

If cumulative inflation in window is above or below the cumulative sum of target then there is a disaster 

In the sample the windows are defined by the occurrence of a disaster. If in a year, the cumulative inflation for this and the next four years deviates sufficiently from the used target, that period is defined as a disaster. We conduct this same procedure for every year that already is not in a disaster window. As a result, the distance from disaster windows are not necessarily multiples of the window size.

```{r method 1}

 compute_windows <- function(df, x, method, wi = wind) {
# 
   #df: data frame with data
   #method: submethod used
   #target: reference variable to define disaster/non-disaster
   #wi:     window for disaster occurrences
# 
#     #create variables:
# 
   #cycle: NA if no cycle / integer if cycle
   cycle <-  paste0(x,"_cycle_",method)
   cycle_s <- sym(cycle)
# 
#   #beginning of cycle: 1 if yes/ 0 if no / NA if middle cycle
   cycle_b_s <- sym(paste0(x,"_cycle_b_",method))
# 
#   #cumulative drop in `x` during cycle: value if during cycle / NA otherwise
   cum <- paste0(x,"_cum_",method)
   cum_s <- sym(cum)
# 
#   #identify existence of disaster: 1 if disaster / 0 if none
   disaster <- paste0(x,"_disaster_",method)
   disaster_s <- sym(disaster)
# 
#   #target to define disaster: value to look at (no NAs)
   target <- paste0(x,"_target_",method)
   target_s <- sym(target)
# 
#   #cumulative target during window (no NAs)
   target_cum <- paste0(x,"_target_cum_",method)
   target_cum_s <- sym(target_cum)
# 
#   #variable of interest
   x_s <- sym(x)
# baseline whole sample
   df <- df  %>%
        group_by(country) %>%  #cumulative sum and disaster identification
         mutate(
               #`wind`-year cumulative "sum" of `x`
               !!cum_s := rollapply(!!x_s, FUN = agg_gr #FUN = sum
                                      , width = wind, align="left"
                                     , fill=NA)
               #`wind`-year cumulative "sum" of `x`
               ,!!target_cum_s := rollapply(!!target_s, FUN = agg_gr #FUN = sum
                                      , width = wind, align="left"
                                      , fill=NA)
               #deviation from target
               ,!!cycle_s := 1*(abs((!!cum_s) - (!!target_cum_s)) > (!!target_cum_s))
               ,!!cycle_s := ifelse(is.na(!!cycle_s), 0, (!!cycle_s)) #assign 0 in beginning
               ) %>%
        ungroup() %>%
         as.data.frame()
# case if change here for positive or negative
  
 
   #routine that looks for five year disaster episodes
   for (k in 1:nrow(df)) {
   #could only do this with a loop, need to find vectorized way
   #only makes sense considering previous operations
# 
    #print(k); print(df[k,cycle]); print(is.na(df[k,cycle]))
 
     if (is.na(df[k,cycle])) {
# 
         next() #already dismissed
# 
     }
# 
#     #Set windows to analyse
# 
#          #ensure end points work
# 
#     #beginning of df
     windback  <- min(wind-1,k-1)
#     #end of df
     windfront <- min(nrow(df)-k,wind-1)
# 
#     #end of country list
     if (windback>0) {
# 
       windback  <- max(which(rev(df$country[(k-windback):(k-1)]==df$country[k])))
       windback  <- max(windback,0)
# 
     }
#     #start of country list
     if (windfront > 0) {
# 
       windfront <- max(which(df$country[(k+1):(k+windfront)]==df$country[k]))
       windfront <- max(windfront,0)
# 
     }
# 
#     #if not enough data points to look in front assume no cycle
     if (windfront < (wind-1)) {
# 
       df[k:(k+windfront),cycle] <- 0
# 
     }
# 
#     #no cycle (if it were a within-cycle point it would be NA)
     if (df[k,cycle]==0) {
# 
        next() #continue
# 
     } else if (df[k,cycle]==1) {         #start of cycle
# 
#       #all exceptions taken care of, therefore can assign start of cycle
# 
       df[k,cycle] <- k                       #assign row number
       df[(k+1):(k+windfront),cycle] <- NA    #assign NA to rest of cycle
# 
     }
# 
     }
# 
   df <- df %>%
         mutate(
           #cumulative sum for cycle
           !!cum_s := ifelse(is.na(!!cycle_s), NA, !!cum_s)
           #start of cycle
          ,!!cycle_b_s := case_when(
                   !is.na(!!cycle_s)&(!!cycle_s)!=0 ~ 1
           ) # otherwise NA
         ) %>% #mark start of cycles
         #fill cycle periods with their corresponding values
         fill(!!cycle_s, !!cum_s, .direction="down") %>%
         #now assign 0 outside cycles (keep NA within cycles)
         mutate(!!cycle_b_s := case_when(
                   (!!cycle_b_s)==1 ~ 1
                  ,(!!cycle_s)==0 ~ 0                   #non-disaster period
                  )# otherwise, within cycle is NA
                #disaster
                ,!!disaster_s := ifelse((!!cycle_s)!=0, 1, 0)
                #if no disaster NA
                ,!!cycle_s := ifelse((!!disaster_s)==0, NA, (!!cycle_s))
                #if no disaster NA
                ,!!cum_s := ifelse((!!disaster_s)==0, NA, (!!cum_s))) %>%
         ungroup()
 
 return(df)
 
 }

```

```{r mean_bounds}

 w_lb <- 0.25
 w_ub <- 0.75

```

### Method 1.1: Use full sample's country-specific mean of truncated sample at 25 and 75 percentiles 
#### (except for the US - use 2% target here)

```{r method 1.1}

 jst <- jst  %>%
   group_by(country)  %>% 
   # reference value to determine disaster periods
   mutate(pi_q = ifelse(pi >= quantile(pi, w_lb) & pi <= quantile(pi, w_ub), pi, NA), # censor tails
          pi_target_1.1 = mean(pi_q, na.rm = TRUE),
          pi_target_1.1 = ifelse(country == "USA", 0.02, pi_target_1.1)) %>% 
   select(-pi_q) %>%
   ungroup() %>%
   # determine disaster windows
   compute_windows("pi", "1.1", wind) 

```

### Method 1.2: Use moving average of truncated country-specific sample at 25 and 75 percentile on a 20-year window
#### (for first 20 years use full sample censored mean)

```{r method 1.2}

 wi_ma       <- 20 # years
 fill_window <- 20 # years: for how many years to use method 1.1 (full sample censored mean), default is same as wi_ma
# 
 censored_mean <- function(x, lb = w_lb, ub = w_ub, wi = wi_ma) {
#   
   y <- x[x >= quantile(x, w_lb) & x <= quantile(x, w_ub)] 
   out <- mean(y, na.rm = TRUE)
   return(out)
#   
 }
# 
 jst <- jst  %>% 
   group_by(country)  %>% 
#   # reference value to determine disaster periods
   mutate(pi_target_1.2 = rollapply(pi,
                                    FUN = censored_mean,
                                    width = wi_ma,
                                    align = "right",
                                    fill = NA),
          pi_target_1.2 = ifelse(row_number() <= fill_window,
                                 pi_target_1.1,
                                 pi_target_1.2)
          ) %>%
   ungroup() %>% 
   # determine disaster windows
   compute_windows("pi", "1.2", wind)

```

### Method 1.3: Use Band-Pass filter to define moving inflation target
#### Butter high-pass Filter with cut-off frequency: 20 years

```{r method 1.3}

  # frequency cut-off (remove above freq)
 # frq <- 20 # years 

#  jst <- jst %>%
#    group_by(country) %>%
 #   mutate(pi_target_1.3 = mFilter::bwfilter(pi, freq = frq)$trend)
  
 # ) %>%
  #   ungroup() %>%
  #   # determine disaster windows
  #   compute_windows("pi", "1.3", wind)


frq <- 20 # years 

jst <- jst %>%
  group_by(country) %>%
  mutate(pi_target_1.3 = mFilter::bwfilter(pi, freq = frq)$trend) %>%
  ungroup() %>%
  # determine disaster windows
  compute_windows("pi", "1.3", wind)

```

### Method 2

Same as method 1 but windows are now determined by sequential peaks and troughs:
  
  1. A trough is an observation where $$y_t - \Delta y_t < 0, \forall t \in \{-2, -1, 1, 2 \};$$ 
  
  2. In contrast, a peak is an observation where $$y_t - \Delta y_t > 0, \forall t \in \{-2, -1, 1, 2 \}.$$
  

```{r method 2}

bbq_duration <- 2 # what length around any point to define peak and trough

bbq_cycles <- function(df, x, duration = bbq_duration) {
  
  # construct variable names
  timeit <-     sym(paste0(x, "_timing_bbq"))  # is enum: going up (-1) or going down (1)?
  cycle_s <-    sym(paste0(x, "_cycle_bbq"))   # is int:  number of cycle
  cycle_b_s <-  sym(paste0(x, "_cycle_b_bbq")) # is enum: is last date of up/down (1) or not (NA)
  duration_s <- paste0(x, "_duration_bbq")     # is int:  duration of the cycle which obs in currently part of
  x <- sym(x)
  
  # bbq (?)
  my_bbq <- function(x) {
    # x is the centered time-series
    T <- length(x)
    k0 <- round(T/2)+1
    
    y <- x[c(1:(k0-1), (k0+1):T)] - x[k0]
    
    # going up (-1), going down (1), or neither (0)
    y <- 1*(all(y > 0) - all(y < 0)) 
    
    return(y)
  }
  
  # identify going ups/going downs
  out <- df %>%
    group_by(country) %>%
    mutate(!!timeit := rollapply((!!x),
                                 width = (2*duration + 1),
                                 FUN = my_bbq,
                                 fill = NA,
                                 align = "center"),
           !!timeit := case_when(
             (!!timeit) != 0 ~ (!!timeit)
             ) # otherwise NA
           ) %>%
    # define the period up until peak/trough as going up/going down.
    # for each country, partial rolling window t = 0, 1 at the beginning are filled up as a convenient side effect
    fill(!!timeit, .direction = "up") %>%
    ungroup() %>%
    mutate(!!cycle_b_s := case_when((!!timeit) != lead(!!timeit) | (country) != lead(country) ~ 1
                                    ) # otherwise NA
           ) %>%
    mutate(!!cycle_s :=   case_when(!!cycle_b_s == 1 ~ row_number()
                                    ) # otherwise NA
           ) %>%
    fill(!!cycle_s, .direction = "up") %>% # this way can number cycles
    # only NAs left in !!timeit: end of period NAs, fill down
    fill(!!timeit, .direction = "down") %>%
    group_by(!!cycle_s) %>%
    add_count(name = duration_s) %>%
    ungroup()
  
  return(out)
}

# add cycles determined by bbq
jst <- bbq_cycles(jst, "pi", bbq_duration)

# function to use target to determine disaster
bbq_disaster <- function(df = jst, x, method) {
  
  # construct variable names
  cycle_s <-    sym(paste0(x, "_cycle_", method))      # is int: cycle number if disaster
  cycle_b_s <-  sym(paste0(x, "_cycle_b_", method))    # is enum: disaster and last period of cycle (1), or no disaster (0),
                                                       #          or disaster but not last period of cycle (NA)
  disaster <-   sym(paste0(x, "_disaster_", method))   # is bool: |target_cum - cum| > target_cum ? 1 : 0
  target <-     sym(paste0(x, "_target_", method))     # is float: target of variable of interest (e.g. pi)
  target_cum <- sym(paste0(x, "_target_cum_", method)) # is float: aggregated version of "target"
  x <-          sym(x)                                 # is float: variable of interest (e.g. pi)
  x_cum <-      sym(paste0(x, "_cum_", method))        # is float: aggregated version of variable of interest
  
  out <- df %>%
          group_by(pi_cycle_bbq) %>% # everything below within a cycle
          mutate(
            # "agg_gr": geometric average
            !!target_cum := agg_gr(!!target), #sum(!!target)
            !!x_cum := agg_gr(!!x), #sum(!!x)
            !!disaster  := 1*( abs((!!target_cum) - (!!x_cum)) > (!!target_cum) ),
            !!cycle_b_s := case_when((!!disaster) == 1 & pi_cycle_b_bbq == 1 ~ 1,
                                     (!!disaster) == 0                       ~ 0
                                     ), # otherwise NA
            !!cycle_s := ifelse((!!disaster) == 1 & pi_cycle_bbq != 0, # BNZ: pi_cycle_bbq != 0 always holds?
                                pi_cycle_bbq, NA)
            ) %>%
    ungroup()
  
}

```

### Method 2.1: Use full sample's country-specific mean of truncated sample at 25 and 75 percentiles 
#### (except for the US - use 2% target here)

```{r method 2.1}

 jst <- jst  %>% 
   # reference value to determine disaster periods
  mutate(pi_target_2.1 = pi_target_1.1) %>%
   # determine disaster windows
   bbq_disaster("pi", "2.1")

```

### Method 2.2: Use moving average of truncated country-specific sample at 25 and 75 percentile on a 20-year window
#### (for first 20 years use full sample censored mean)

```{r method 2.2}

 jst <- jst  %>% 
  # reference value to determine disaster periods
   mutate(pi_target_2.2 = pi_target_1.2) %>%
   # determine disaster windows
   bbq_disaster("pi", "2.2")

```

### Method 2.3: Use Band-Pass filter to define moving inflation target
#### Butter high-pass Filter with cut-off frequency: 20 years

```{r method 2.3}

jst <- jst  %>% 
  # reference value to determine disaster periods
  mutate(pi_target_2.3 = pi_target_1.3) %>%
  # determine disaster windows
  bbq_disaster("pi", "2.3")

```

```{r glossary}

# methods
disasters <- c(1.1, 1.2, 1.3, 2.1, 2.2, 2.3)
#disasters <- c(2.3)
disasters_plots <- c(2.3)

# names for disasters
disaster_names <- paste0("Method ", disasters)

# colours for plots
clrs <- c("red", "blue", "orange", "green", "purple", "brown")


# fight fire with fire
jst_deflation <- jst
jst_inflation <- jst

jst_deflation_alt <- jst
jst_inflation_alt <- jst

for (d in disasters) {

  sub_s <- sym(paste0("pi_sub_", d))
  cum_s <- sym(paste0("pi_cum_", d))
  disaster_s <- sym(paste0("pi_disaster_", d))
  cycle_s <- sym(paste0("pi_cycle_", d))
  cycle_b_s <- sym(paste0("pi_cycle_b_", d))
    
  na_keep <- function(x, y) {
    return(ifelse(!is.na(x) & x != 0 & y == 0, NA, x))
  }
  
  jst_deflation <- jst_deflation %>%
    mutate((!!sub_s) := (!!cum_s) < 0, # is (0) if inflation -> NA, (1) if stay in subsample
           (!!cycle_b_s) := na_keep((!!cycle_b_s), (!!sub_s)), # key variable to compute unconditional probability
           (!!cycle_s) := na_keep((!!cycle_s), (!!sub_s)),
           (!!disaster_s) := na_keep((!!disaster_s), (!!sub_s))
           )
  
  jst_inflation <- jst_inflation %>%
    mutate((!!sub_s) := (!!cum_s) >= 0, # is (0) if deflation -> NA, (1) if stay in subsample
           (!!cycle_b_s) := na_keep((!!cycle_b_s), (!!sub_s)), # key variable to compute unconditional probability
           (!!cycle_s) := na_keep((!!cycle_s), (!!sub_s)),
           (!!disaster_s) := na_keep((!!disaster_s), (!!sub_s))
           )
  
  # the alt(ernative) versions merely generate the indicator, so one needs to operate using the indicator in ifelse-evals
  # explicitly, while in the verisons above it's taken care of via NAs.
  # the alt-versions however allow conditioning on any disaster rather than just the subsample disasters.
  
  jst_deflation_alt <- jst_deflation_alt %>%
    mutate((!!sub_s) := (!!cum_s) < 0)
  
  jst_inflation_alt <- jst_inflation_alt %>%
    mutate((!!sub_s) := (!!cum_s) >= 0)
}

#jst <- jst_deflation #pass here which subsample it is deflation (negative) or inflation (positive), nothing if just decomment

```

### Visualization of Disaster Identification


```{r pi_plots}

plotr <- list()
i <- 1
for (d in disasters_plots) {

  #title <- paste0("Inflation Disaster, Method ", d)
  #title <- "Historical Inflation Disasters"
  title <- ""
  
  d_s <- sym(paste0("pi_disaster_", d))
  
  # plot
  plott <- ggplot(jst, aes(x = date, y = (!!d_s))) +
            geom_area(aes(fill = country), alpha = 0.5) +
            ggtitle(title) +
            scale_y_continuous( expand = c(0, 0)) +
            scale_x_date(date_breaks = "5 years",
                         date_labels = "%Y") +
            ylab("count") + xlab("date")
  
  # combine plots
  plotr[[i]] <- plott
 
  # print plot
  print(plott)
  ggsave(paste0("../Figures/fig_pitimeseries_", d, ".pdf"), device = "pdf")
   
  i <- i+1
  
}

#ggarrange(plotlist = plotr, ncol = length(plotr))

```

## Probability of disaster

Disaster probability definitions:

1. Probability of disaster means the probability of a 5-year disaster occurring in a random year (not already included in any disaster window).

2. The probability of an input disaster is the probability of entering a disaster in a random year (not already included in a disaster period)

3. Probability of at least 1 year of overlap in disaster types conditional on inflation disaster

```{r probabilities_of_disaster,}

# Unconditional probability of inflation disaster
my_pi_prob <- function(x, df = jst) {
  
  x <- paste0("pi_cycle_b_", x)
  # reminder: pi_cycle_b_[method nr.] is (1) if last period of disaster cycle,
  # (0) if no disaster in current cycle, (NA) otherwise, i.e. disaster but is not last period
  out <- round( sum(df[,x] == 1 & !is.na(df[,x])) / sum(!is.na(df[,x])), 4)
  #out <- round(sum(df[,x] == 1 | is.na(df[,x])) / nrow(df) , 4)
  # BNZ: if df[,x] == 1 then "!is.na" trivially?
  # BNZ: currently only counts one disaster obs per cycle / (one disaster obs per cycle + every obs for no-disaster cycles)
  #     though consistent with g_probabilities below
  return(out)
  
}
p_pi <- lapply(disasters, my_pi_prob) 
names(p_pi) <- disaster_names

# Unconditional probability of gdp disaster

# Barro (2006)
# restrict to countries in both samples (BNZ: that's not what this is doing?)
temp <- jst %>% filter(country %in% unique(barro$country))
b6_p_g <- round(sum(temp[,"b6_g_cycle_b"] == 1 & !is.na(temp[,"b6_g_cycle_b"]))
                / sum(!is.na(temp[,"b6_g_cycle_b"]) ),
                4)

# Barro and Jin (2011)
# restrict to countries in both samples (BNZ: that's not what this is doing?)
temp <- jst %>% filter(country %in% unique(barrou$country))
bj_p_g <- round(sum(temp[,"bj_g_cycle_b"] == 1 & !is.na(temp[,"bj_g_cycle_b"]))
                / sum(!is.na(temp[,"bj_g_cycle_b"]) ),
                4)

# joint disaster probabilities
#       hard to define since they don't perfectly overlap...
#       as a result the easiest is to understand what is the probability
#       of the cycles overlapping for at least one year, conditional
#       on an inflation disaster...
conditional_joint_disasters <- function(method, barro, df) {
  
  # df: data frame with data
  # method: submethod used
  # barro: dataset for g
  
  # create variables:
  
  # inflation
  x <- "pi"
  
  # cycle: integer with cycle number if disaster, NA otherwise
  cycle <-  paste0(x, "_cycle_", method)
  cycle_s <- sym(cycle)
   
  # beginning of cycle: 1 if yes/ 0 if no / NA if middle cycle
  cycle_b_s <- sym(paste0(x, "_cycle_b_", method))
  
  # identify existence of disaster: 1 if disaster / 0 if none
  disaster <- paste0(x, "_disaster_", method)
  disaster_s <- sym(disaster)
  
  # disasters: (1) if cycle with joint disaster, (0) if only inflation disaster, (NA) if neither
  g_pi_disaster <- paste0(barro, "_g_pi_disaster_", method)
  g_pi_disaster_s <- sym(g_pi_disaster)

  # beginning of cycle of inflation and gdp disasters: 
  # 1 if yes/ 0 if no / NA if middle cycle
  z <- paste0(barro, "_g_pi_cycle_b_", method)
  z_s <- sym(z)

  # g disaster: 1 if disaster 0 otherwise
  g_disaster <- paste0(barro, "_g_disaster")
  g_disaster_s <- sym(g_disaster)
  
  # beginning of cycle of inflation and gdp disasters: 
  # last row number if yes/ NA if not
  y <- paste0("g_pi_cycle_", method)
  y_s <- sym(y)
  
  # check if there is at least one year of disaster overlap
  df <- df %>%
        mutate(!!g_pi_disaster_s := case_when(
          (!!g_disaster_s) == 1 & !is.na(!!cycle_s) ~ 1,
          (!!g_disaster_s) == 0 & !is.na(!!cycle_s) ~ 0
          ) # (1) if g_disaster and inflation disaster, (0) if only inflation disaster, otherwise NA
        ) %>% 
        group_by(!!cycle_s) %>% 
        # #check minimum year of joint disaster
        # mutate(!!z_s := sum(!!g_pi_disaster_s) #otherwise NA (within-joint-cycle)
        #       ,!!z_s := case_when(
        #           (!!z_s)==win   & (!!cycle_b_s)==1 ~ 1 #start of cycle
        #          ,(!!z_s) < win                     ~ 0 #not joint cycle
        #          ,(!!g_pi_disaster_s)==0            ~ 0 #not joint disaster
        #          ) #mid_cycle NA
        #       ,!!y_s := !!z_s #get beginning of cycle
        #       ) %>% 
        # fill(!!y_s, .direction="down") %>%
        # mutate(!!y_s := ifelse((!!y_s)==0, NA, (!!y_s))) %>% 
        mutate(!!g_pi_disaster_s := case_when(sum(!!g_pi_disaster_s) > 0  ~ 1,
                                              sum(!!g_pi_disaster_s) == 0 ~ 0
                                              ) # if in a cycle there is any g_disaster and inflation disaster, code entire cycle as joint disaster. if any inflation disaster but no g_disaster, code entire cycle as inflation disaster (should be redundant step). otherwise NA
               ) %>%
        ungroup()

    return(df)
}

for (d in disasters) {
  jst <- conditional_joint_disasters(d, "b6", jst) # Barro (2006)
  jst <- conditional_joint_disasters(d, "bj", jst) # Barro, Jin (2011)
}

# probability computations
my_pi_g_prob <- function(method, barro, df2, df) {
  
  # restrict to countries in both samples
  df <- df %>% filter(country %in% unique(df2$country))
  
 # print(sum(!is.na(unique(df$country))))
  
  cycle <- paste0(barro, "_g_pi_disaster_", method)
 # print(cycle)
  p <- round(sum(df[,cycle] == 1 & !is.na(df[,cycle])) / sum(!is.na(df[,cycle])), 4)
  # probability of joint disaster (= 1) conditional on inflation disaster (either 0 or 1)
  
  return(p)
}

b6_p_pi_g <- lapply(disasters, my_pi_g_prob, barro = "b6", df2 = barro, df = jst) # probabilities
names(b6_p_pi_g) <- disaster_names

bj_p_pi_g <- lapply(disasters, my_pi_g_prob, barro = "bj", df2 = barrou, df = jst) # probabilities
names(bj_p_pi_g) <- disaster_names

# input
t_g <- rbind(unlist(b6_p_g),
             unlist(bj_p_g))
t_g <- as.data.frame(paste0(round(100*t_g, 1), "%"))
rownames(t_g) <- c("Barro (2006)", "Barro&Jin (2011)")
colnames(t_g) <- "Unconditional Probability of input Disaster" # BNZ: changed from "Inflation Disaster"
#kable(t_g, "html") %>% kable_styling()
stargazer(t_g, type = 'html', summary = FALSE, float = FALSE,
          out = "../input/t_g.tex")

# inflation
t_pi <- as.data.frame(rbind(unlist(p_pi),
                            unlist(b6_p_pi_g),
                            unlist(bj_p_pi_g)))
t_pi <- as.data.frame(lapply(t_pi, function(x) paste0(round(100*x, 1), "%")))
colnames(t_pi) <- paste0("Method ", disasters)
rownames(t_pi) <- c("Unconditional Probability of Inflation Disaster",
                    "Probability of at least 1 year joint disaster conditional on inflation disaster (Barro 2006)",
                    "Probability of at least 1 year joint disaster conditional on inflation disaster (Barro&Jin 2011)")
#kable(t_pi, "html") %>% kable_styling()
#stargazer_input <- capture.input(stargazer(t_pi,type = 'html', summary = FALSE, float = FALSE, out = "../input/t_pi.tex"))
stargazer(t_pi, type = 'html', summary = FALSE, float = FALSE, out = "../input/t_pi.tex")

```

## Probability Distributions of $z$

Throughout we are making use of Barro's transformation $z = \frac{1}{1-b}, b = -g$

```{r mle}

# one-sided pareto distribution
pareto_mle <- function(X) {
  # https://stats.stackexchange.com/questions/27426/how-do-i-fit-a-set-of-data-to-a-pareto-distribution-in-r/175951
  n <- length(X) 
  m <- min(X)
  a <- n/sum(log(X/m))
  return(c(Zo = round(m, 2), Alpha = round(a, 2))) 
}

# two-sided symmetric pareto distribution with peak at `b`<>0
symmetric_pareto_mle <- function(data){
  
  loglik <- function(params,x) {
    
    a <- params[[1]]
    b <- params[[2]]
    
    f <- a*(b^a)/(2*(b+abs(x-b))^(a+1))
    
    logf <- -log(f/2) #"/2" for support, and "-" for likelihood
    
    loglik = sum(logf)
    
    return(loglik)
    
  }
  
  # maximize log likelihood
  p_init <- c(1, 1)    # a = 0, alpha = 2 
  min_loglik <- optim(p_init, loglik, x = data)
  parameters <- min_loglik$par
  
  return(parameters)
  
}

```

### The sample in the computations includes **only** years with both gdp and inflation disasters

#### Replicate Barro & Jin's (2011) results

```{r replicate_barro_jin}

barrou$z <- 1/(1-barrou$g_cum) #z transformation

# table
temp <- as.data.frame(pareto_mle(barrou$z))
colnames(temp) <- "Barro & Jin's (2011) estimates" 
knitr::kable(temp, "html")

```

### Fit z's distribution conditional on inflation and input disaster, using Barro and Jin (2011) data


```{r fit_conditional_dist}

fit_cd <- function(barro, agg = 0, g_data = "barro", df = jst, ds = disasters, 
                   plot_factor = 10, plot_disasters = NULL, plot_colors = NULL) {
  
  pareto_parameters <- data.frame(Zo = character(0), Alpha = character(0))
  dstats <- data.frame(Min = character(0),
                       Mean = character(0),
                       StDev = character(0))
  plotr <- list()  # Initialize plot list
  i <- 1
  for (d in ds) {
  
    # inflation disaster variables to use
    pi_disaster <- sym(paste0("pi_disaster_", d))
    pi_cycle <- sym(paste0("pi_cycle_", d))
   
    # input disaster variables to use
    g_disaster <- sym(paste0(barro, "_g_disaster"))
    
    # specifications for different data used
    if (agg) {
      my_agg <- agg_gr
      g_s <- sym(paste0(barro, "_g"))
    } else {
      my_agg <- function(x, y) {
        x <- x[!duplicated(y)]
        if (length(x) > 0) {
          return(agg_gr(x, na.rm = TRUE))
        } else {
          return(0)
        }
      }
      g_s <- sym(paste0(barro, "_g_cum"))
    }

    if (g_data == "jst") {
      my_agg <- agg_gr
      g_s <- sym("g")
    }

    g_cdist <- jst %>%
      mutate(disaster = ifelse((!!pi_disaster) == 1 & (!!g_disaster) == 1, 1, 0)) %>%
      filter(disaster == 1) %>%
      select(date, country, (!!pi_cycle), (!!g_s)) %>%
      group_by((!!pi_cycle)) %>%
      summarize(g = my_agg((!!g_s), (!!pi_cycle)), 
                start = first(date),
                end = last(date)) %>%
      mutate(z = 1/(1+g)) %>%
      ungroup()
    
    z_min <- min(g_cdist$z, na.rm = TRUE)
    z_max <- max(g_cdist$z, na.rm = TRUE)
    
    if (!is.finite(z_min) || !is.finite(z_max) || z_min == z_max) {
      warning(paste("Non-finite or zero range in z values detected for Method", d, 
                    "- using fallback values"))
    }

    dstat <- c(z_min, mean(g_cdist$z, na.rm = TRUE), sd(g_cdist$z, na.rm = TRUE))
    dstats[i,] <- round(dstat, 3)
    
    parameters <- pareto_mle(g_cdist$z)
    pareto_parameters[i,] <- parameters
    
    # Check if the current disaster should be plotted
    if (!is.null(plot_disasters) && d %in% plot_disasters) {
      color_index <- match(d, plot_disasters)
      plot_color <- ifelse(!is.null(plot_colors) && length(plot_colors) >= color_index, 
                           plot_colors[color_index], 
                           "gray")
      
      sup <- seq(z_min, z_max, length.out = 1000)
      a  <- parameters[2]
      xm <- parameters[1]
      dens <- (a*(xm)^a)/sup^(a+1)
      dens <- plot_factor * dens
      pd <- data.frame(support = sup, density = dens)
      
      title <- paste("Disaster Method", d)
      plott <- ggplot() +
        geom_histogram(data = g_cdist, aes(x = z),
                       fill = plot_color,  # Use specified color for the plot
                       alpha = 0.5) +
        ggtitle(title) +
        scale_y_continuous(expand = c(0, 0)) +
        xlab("z") +
        geom_line(data = pd, aes(x = support, y = density),
                  colour = "blue") +
        scale_y_continuous(expand = c(0, 0),
                           sec.axis = sec_axis(trans = ~ ./plot_factor,
                                               name = "scaled density - pareto"))
      
      plotr[[i]] <- plott
      print(plott)
      ggsave(paste0("../Figures/fig_pipareto_", d, ".pdf"), device = "pdf")
    }
    
    i <- i + 1
  }
  
  dstats <- t(dstats)
  colnames(dstats) <- paste0("Method ", ds)
  k1 <- stargazer(dstats, type = "html" ,summary = FALSE, float = FALSE,
                  out = "../input/dstats.tex")
  
  pareto_parameters <- t(pareto_parameters)
  colnames(pareto_parameters) <- paste0("Method ", ds)
  k2 <- stargazer(pareto_parameters,  type = "html",summary = FALSE, float = FALSE,
                  out = "../input/pareto_parameters.tex")

  




  #k2 <- knitr::kable(pareto_parameters, "html") %>% kable_styling()

    # return(list(`Descriptive Statistics` = k1,
             #    `Pareto Parameters` = k2))

}

```



#### Fit z's distribution conditional on inflation disaster, using Barro (2006) data


#### Collect only input for overlap years 
##### input distributed evenly through the disaster period

```{r pareto_b6_subset_sample_uniform}

fit_cd("b6", agg = 1, plot_factor = 1, plot_disaster = disasters_plots[1], plot_colors = clrs)

```


